{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# -*- utf-8 -*-\n",
    "# 1 数据抽取\n",
    "# 2 数据探索分析\n",
    "#   通过频率分布直方图分析用户用水停顿时间间隔的规律性--->探究划分一次完整用水事件的时间间隔阈值\n",
    "# \n",
    "# 3 数据预处理\n",
    "# （1）数据规约 data_guiyue.py\n",
    "# -*- utf-8 -*-\n",
    "# 规约掉\"热水器编号\"、\"有无水流\"、\"节能模式\"三个属性\n",
    "# 注意：\n",
    "#书中提到：规约掉热水器\"开关机状态\"==\"关\"且”水流量”==0的数据，说明热水器不处于工作状态，数据记录可以规约掉。但由后文知，此条件不能进行规约\n",
    "# 因为，\"开关机状态\"==\"关\"且”水流量”==0可能是一次用水中的停顿部分，删掉后则无法准确计算关于停顿的数据\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>热水器编号</th>\n",
       "      <th>发生时间</th>\n",
       "      <th>开关机状态</th>\n",
       "      <th>加热中</th>\n",
       "      <th>保温中</th>\n",
       "      <th>有无水流</th>\n",
       "      <th>实际温度</th>\n",
       "      <th>热水量</th>\n",
       "      <th>水流量</th>\n",
       "      <th>节能模式</th>\n",
       "      <th>加热剩余时间</th>\n",
       "      <th>当前设置温度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R_00001</td>\n",
       "      <td>20141019063917</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>无</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>关</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>R_00001</td>\n",
       "      <td>20141019070154</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>无</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>关</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>R_00001</td>\n",
       "      <td>20141019070156</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>无</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>8</td>\n",
       "      <td>关</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>R_00001</td>\n",
       "      <td>20141019071230</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>无</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>关</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>R_00001</td>\n",
       "      <td>20141019071236</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>无</td>\n",
       "      <td>29°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>关</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     热水器编号            发生时间 开关机状态 加热中 保温中 有无水流  实际温度 热水量  水流量 节能模式 加热剩余时间  \\\n",
       "0  R_00001  20141019063917     关   关   关    无  30°C  0%    0    关    0分钟   \n",
       "1  R_00001  20141019070154     关   关   关    无  30°C  0%    0    关    0分钟   \n",
       "2  R_00001  20141019070156     关   关   关    无  30°C  0%    8    关    0分钟   \n",
       "3  R_00001  20141019071230     关   关   关    无  30°C  0%    0    关    0分钟   \n",
       "4  R_00001  20141019071236     关   关   关    无  29°C  0%    0    关    0分钟   \n",
       "\n",
       "  当前设置温度  \n",
       "0   50°C  \n",
       "1   50°C  \n",
       "2   50°C  \n",
       "3   50°C  \n",
       "4   50°C  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pandas import DataFrame\n",
    "or_data = pd.read_excel('original_data.xls',encoding='gbk')\n",
    "or_data.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>发生时间</th>\n",
       "      <th>开关机状态</th>\n",
       "      <th>加热中</th>\n",
       "      <th>保温中</th>\n",
       "      <th>实际温度</th>\n",
       "      <th>热水量</th>\n",
       "      <th>水流量</th>\n",
       "      <th>加热剩余时间</th>\n",
       "      <th>当前设置温度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20141019063917</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20141019070154</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20141019070156</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>8</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20141019071230</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20141019071236</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>29°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             发生时间 开关机状态 加热中 保温中  实际温度 热水量  水流量 加热剩余时间 当前设置温度\n",
       "0  20141019063917     关   关   关  30°C  0%    0    0分钟   50°C\n",
       "1  20141019070154     关   关   关  30°C  0%    0    0分钟   50°C\n",
       "2  20141019070156     关   关   关  30°C  0%    8    0分钟   50°C\n",
       "3  20141019071230     关   关   关  30°C  0%    0    0分钟   50°C\n",
       "4  20141019071236     关   关   关  29°C  0%    0    0分钟   50°C"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = or_data.drop(or_data.columns[[0,5,9]],axis=1) # 删掉不相关属性\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 18840 entries, 0 to 18839\n",
      "Data columns (total 9 columns):\n",
      "发生时间      18840 non-null int64\n",
      "开关机状态     18840 non-null object\n",
      "加热中       18840 non-null object\n",
      "保温中       18840 non-null object\n",
      "实际温度      18840 non-null object\n",
      "热水量       18840 non-null object\n",
      "水流量       18840 non-null int64\n",
      "加热剩余时间    18840 non-null object\n",
      "当前设置温度    18840 non-null object\n",
      "dtypes: int64(2), object(7)\n",
      "memory usage: 1.3+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "18840\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>发生时间</th>\n",
       "      <th>开关机状态</th>\n",
       "      <th>加热中</th>\n",
       "      <th>保温中</th>\n",
       "      <th>实际温度</th>\n",
       "      <th>热水量</th>\n",
       "      <th>水流量</th>\n",
       "      <th>加热剩余时间</th>\n",
       "      <th>当前设置温度</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014-10-19 06:39:17</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2014-10-19 07:01:54</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2014-10-19 07:01:56</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>8</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2014-10-19 07:12:30</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-10-19 07:12:36</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>29°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2014-10-19 07:16:02</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2014-10-19 07:16:08</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>29°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2014-10-19 07:20:05</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2014-10-19 07:20:10</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>29°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2014-10-19 07:21:53</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>关</td>\n",
       "      <td>30°C</td>\n",
       "      <td>0%</td>\n",
       "      <td>0</td>\n",
       "      <td>0分钟</td>\n",
       "      <td>50°C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 发生时间 开关机状态 加热中 保温中  实际温度 热水量  水流量 加热剩余时间 当前设置温度\n",
       "0 2014-10-19 06:39:17     关   关   关  30°C  0%    0    0分钟   50°C\n",
       "1 2014-10-19 07:01:54     关   关   关  30°C  0%    0    0分钟   50°C\n",
       "2 2014-10-19 07:01:56     关   关   关  30°C  0%    8    0分钟   50°C\n",
       "3 2014-10-19 07:12:30     关   关   关  30°C  0%    0    0分钟   50°C\n",
       "4 2014-10-19 07:12:36     关   关   关  29°C  0%    0    0分钟   50°C\n",
       "5 2014-10-19 07:16:02     关   关   关  30°C  0%    0    0分钟   50°C\n",
       "6 2014-10-19 07:16:08     关   关   关  29°C  0%    0    0分钟   50°C\n",
       "7 2014-10-19 07:20:05     关   关   关  30°C  0%    0    0分钟   50°C\n",
       "8 2014-10-19 07:20:10     关   关   关  29°C  0%    0    0分钟   50°C\n",
       "9 2014-10-19 07:21:53     关   关   关  30°C  0%    0    0分钟   50°C"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[u'发生时间'] = pd.to_datetime(data[u'发生时间'], format = '%Y%m%d%H%M%S')#将时间列转成日期格式（***）\n",
    "print len(data)\n",
    "# 由后文知，此条件无用\n",
    "# data1 = data[(data[u'开关机状态']==u'开')|(data[u'水流量']!=0)]\n",
    "# data1.head()\n",
    "data.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data.to_excel('data_guiyue.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
